library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
library(ipumsr)
library(sf)
library(terra)
library(ggspatial)
library(lubridate)
Note: You will need to update this file name to match your DHS extract number!
# Load the metadata file
ke_ddi <- read_ipums_ddi("data/idhs_00023.xml") # Use your own file name here
# Use metadata to load the data correctly
ke_dhs <- read_ipums_micro(ke_ddi)
## Use of data from IPUMS DHS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
ke_dhs
## # A tibble: 20,964 × 53
## SAMPLE SAMPLESTR COUNTRY YEAR IDHSPID IDHSHID DHSID IDHSPSU
## <int+lbl> <chr+lbl> <int+lbl> <int> <chr> <chr> <chr> <dbl>
## 1 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 2 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 3 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 4 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 5 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 6 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 7 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 8 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 9 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## 10 40406 [Kenya 2014] 40406 [Keny… 404 [Ken… 2014 40406 … 40406 … KE20… 4.04e10
## # ℹ 20,954 more rows
## # ℹ 45 more variables: IDHSSTRATA <dbl>, CASEID <chr>, HHID <chr>, PSU <dbl>,
## # STRATA <dbl>, DOMAIN <dbl>, HHNUM <dbl>, CLUSTERNO <dbl>, LINENO <int>,
## # BIDX <int>, PERWEIGHT <dbl>, KIDWT <dbl>, AWFACTT <dbl>, AWFACTU <dbl>,
## # AWFACTR <dbl>, AWFACTE <dbl>, AWFACTW <dbl>, DVWEIGHT <dbl>,
## # URBAN <int+lbl>, GEO_KE1989_2014 <int+lbl>, GEO_KE2014 <int+lbl>,
## # GEOALT_KE2014 <int+lbl>, AGE <int>, AGE5YEAR <int+lbl>, …
colnames(ke_dhs)
## [1] "SAMPLE" "SAMPLESTR" "COUNTRY" "YEAR"
## [5] "IDHSPID" "IDHSHID" "DHSID" "IDHSPSU"
## [9] "IDHSSTRATA" "CASEID" "HHID" "PSU"
## [13] "STRATA" "DOMAIN" "HHNUM" "CLUSTERNO"
## [17] "LINENO" "BIDX" "PERWEIGHT" "KIDWT"
## [21] "AWFACTT" "AWFACTU" "AWFACTR" "AWFACTE"
## [25] "AWFACTW" "DVWEIGHT" "URBAN" "GEO_KE1989_2014"
## [29] "GEO_KE2014" "GEOALT_KE2014" "AGE" "AGE5YEAR"
## [33] "RESIDENT" "RELIGION" "MARSTAT" "CHEB"
## [37] "FLOOR" "TOILETTYPE" "DRINKWTR" "CURRWORK"
## [41] "WEALTHQ" "WEALTHS" "EDUCLVL" "EDYRTOTAL"
## [45] "HEIGHTFEM" "KIDSEX" "KIDDOBCMC" "LINENOKID"
## [49] "HWHAZWHO" "BIRTHWT" "BIRTHWTREF" "FEVRECENT"
## [53] "DIARRECENT"
ke_dhs[98, c(1, 2, 3)]
## # A tibble: 1 × 3
## SAMPLE SAMPLESTR COUNTRY
## <int+lbl> <chr+lbl> <int+lbl>
## 1 40406 [Kenya 2014] 40406 [Kenya 2014] 404 [Kenya]
ke_dhs[, "DHSID"]
## # A tibble: 20,964 × 1
## DHSID
## <chr>
## 1 KE201400000001
## 2 KE201400000001
## 3 KE201400000001
## 4 KE201400000001
## 5 KE201400000001
## 6 KE201400000001
## 7 KE201400000001
## 8 KE201400000001
## 9 KE201400000001
## 10 KE201400000001
## # ℹ 20,954 more rows
ke_dhs$RELIGION
## <labelled<integer>[20964]>: Religion
## [1] 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300
## [15] 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300 2300
## [29] 2300 2100 2100 2300 2300 2300 2300 2300 2300 2300 2100 2300 2300 2300
## [43] 2100 2300 2300 2100 2100 2300 2300 2300 2300 2300 2300 2100 2300 2300
....
# Display variable information for all variables in our file
ipums_var_info(ke_dhs)
## # A tibble: 53 × 4
## var_name var_label var_desc val_labels
## <chr> <chr> <chr> <list>
## 1 SAMPLE IPUMS-DHS sample identifier "SAMPLE… <tibble>
## 2 SAMPLESTR IPUMS-DHS sample identifier (string) "SAMPLE… <tibble>
## 3 COUNTRY Country "COUNTR… <tibble>
## 4 YEAR Year of sample "YEAR r… <tibble>
## 5 IDHSPID Unique cross-sample respondent identifier "IDHSPI… <tibble>
## 6 IDHSHID Unique cross-sample household identifier "IDHSHI… <tibble>
## 7 DHSID Key to link DHS clusters to context data (str… "DHSID … <tibble>
## 8 IDHSPSU Unique sample-case PSU identifier "IDHSPS… <tibble>
## 9 IDHSSTRATA Unique cross-sample sampling strata "IDHSST… <tibble>
## 10 CASEID Sample-specific respondent identifier "CASEID… <tibble>
## # ℹ 43 more rows
ipums_var_desc(ke_dhs$BIRTHWT)
## [1] "For children born in the three to five years before the survey, BIRTHWT (M19) reports the child's birthweight in kilos with three implied decimal places (or, alternatively stated, in grams with no decimal places). Children who were not weighed are coded 9996."
ipums_val_labels(ke_dhs$RELIGION)
## # A tibble: 64 × 2
## val lbl
## <int> <chr>
## 1 0 NO RELIGION
## 2 1000 MUSLIM
## 3 2000 CHRISTIAN
## 4 2100 Catholic
## 5 2200 Orthodox
## 6 2300 Protestant
## 7 2310 Lutheran
## 8 2320 Anglican
## 9 2330 Presbyterian
## 10 2340 Baptist/Seventh-day Adventist
## # ℹ 54 more rows
ipums_val_labels(ke_dhs$BIRTHWT)
## # A tibble: 5 × 2
## val lbl
## <int> <chr>
## 1 9995 9995+
## 2 9996 Not weighed at birth
## 3 9997 Don't know
## 4 9998 Missing
## 5 9999 NIU (not in universe)
ke_dhs$BIRTHWT[1:5]
## <labelled<integer>[5]>: Birthweight in kilos
## [1] 9999 9999 3200 3500 9999
##
## Labels:
## value label
## 9995 9995+
## 9996 Not weighed at birth
## 9997 Don't know
## 9998 Missing
## 9999 NIU (not in universe)
# For values greater than 9995, convert to NA (missing)
# Otherwise, divide by 1000 to convert decimal places appropriately
birthwt_clean <- if_else(
ke_dhs$BIRTHWT > 9995, # Condition
NA, # Replacement if TRUE
ke_dhs$BIRTHWT / 1000 # Replacement if FALSE
)
birthwt_clean[1:5]
## [1] NA NA 3.2 3.5 NA
ke_dhs <- ke_dhs |>
mutate(BIRTHWT = if_else(BIRTHWT > 9995, NA, BIRTHWT / 1000))
ke_dhs$BIRTHWT[1:5]
## [1] NA NA 3.2 3.5 NA
ke_dhs <- ke_dhs |>
mutate(
HEIGHTFEM = if_else(HEIGHTFEM >= 9994, NA, HEIGHTFEM / 10),
BIRTHWTREF = if_else(BIRTHWTREF >= 7, NA, BIRTHWTREF)
)
ke_dhs$EDUCLVL[1:5]
## <labelled<integer>[5]>: Highest educational level
## [1] 2 2 2 2 1
##
## Labels:
## value label
## 0 No education
## 1 Primary
## 2 Secondary
## 3 Higher
## 8 Missing
case_when(
ke_dhs$EDUCLVL == 8 ~ NA,
ke_dhs$EDUCLVL >= 2 ~ "Secondary+",
ke_dhs$EDUCLVL == 1 ~ "Primary",
TRUE ~ "None"
)
## [1] "Secondary+" "Secondary+" "Secondary+" "Secondary+" "Primary"
## [6] "Primary" "Primary" "Secondary+" "Secondary+" "Secondary+"
## [11] "Secondary+" "Secondary+" "Secondary+" "Primary" "Primary"
## [16] "Secondary+" "Secondary+" "Secondary+" "Secondary+" "Primary"
## [21] "Primary" "Primary" "Primary" "Secondary+" "Secondary+"
....
ke_dhs <- ke_dhs |>
mutate(
EDUCLVL = case_when(
EDUCLVL == 8 ~ NA,
EDUCLVL >= 2 ~ "Secondary+",
EDUCLVL == 1 ~ "Primary",
TRUE ~ "None"
)
)
Note: this is not displayed in slides but should be run in demo…demo code to be included in this file?
ke_dhs <- ke_dhs |>
mutate(
FLOOR = case_when(
FLOOR >= 996 ~ NA,
FLOOR >= 300 ~ "Finished",
TRUE ~ "Unfinished"
),
TOILETTYPE = case_when(
TOILETTYPE == 0 ~ "No facility",
TOILETTYPE >= 9996 ~ NA,
TOILETTYPE >= 2000 ~ "Non-flush",
TRUE ~ "Flush"
),
DRINKWTR = case_when(
DRINKWTR >= 9996 ~ NA,
DRINKWTR >= 4000 ~ "Other",
DRINKWTR >= 3000 ~ "Surface",
DRINKWTR >= 2000 ~ "Well",
TRUE ~ "Piped"
),
FEVRECENT = case_when(
FEVRECENT >= 97 ~ NA,
FEVRECENT >= 20 ~ "Yes",
TRUE ~ "No"
),
DIARRECENT = case_when(
DIARRECENT >= 97 ~ NA,
DIARRECENT >= 20 ~ "Yes",
TRUE ~ "No"
)
)
# str_squish() removes excess whitespace in the ID strings
ke_dhs <- ke_dhs |>
mutate(KIDID = str_squish(paste(IDHSPID, BIDX)))
ke_dhs$KIDID
## [1] "40406 0001019 02 1" "40406 0001019 03 1" "40406 0001033 02 1"
## [4] "40406 0001033 02 2" "40406 0001037 02 1" "40406 0001037 02 2"
## [7] "40406 0001041 02 1" "40406 0001059 02 1" "40406 0001059 02 2"
## [10] "40406 0001059 02 3" "40406 0001063 02 1" "40406 0001072 02 1"
## [13] "40406 0001099 02 1" "40406 0002006 01 1" "40406 0002006 02 1"
....
ke_dhs <- ke_dhs |>
filter(RESIDENT == 1)
ke_dhs$RESIDENT
## <labelled<integer>[20477]>: Usual resident or visitor
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [37] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [73] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [109] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
....
ke_dhs <- ke_dhs |>
filter(!is.na(BIRTHWT))
ggplot(ke_dhs) +
geom_histogram(aes(x = BIRTHWT), bins = 70)
ke_dhs <- as_factor(ke_dhs) # Convert all labeled columns to factors
ke_dhs$RESIDENT
## [1] Usual resident Usual resident Usual resident Usual resident
## [5] Usual resident Usual resident Usual resident Usual resident
## [9] Usual resident Usual resident Usual resident Usual resident
## [13] Usual resident Usual resident Usual resident Usual resident
## [17] Usual resident Usual resident Usual resident Usual resident
....
# Reminder: these are fake coordinates!
ke_gps <- st_read("data/ke_gps.shp", quiet = TRUE)
ke_gps
## Simple feature collection with 1594 features and 20 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 33.98758 ymin: -4.586276 xmax: 41.87459 ymax: 4.611987
## Geodetic CRS: WGS 84
## First 10 features:
## DHSID DHSCC DHSYEAR DHSCLUST CCFIPS ADM1FIPS ADM1FIPSNA ADM1SALBNA
## 1 KE201400000001 KE 2014 1 KE NULL NULL NULL
## 2 KE201400000002 KE 2014 2 KE NULL NULL NULL
## 3 KE201400000003 KE 2014 3 KE NULL NULL NULL
## 4 KE201400000004 KE 2014 4 KE NULL NULL NULL
## 5 KE201400000005 KE 2014 5 KE NULL NULL NULL
## 6 KE201400000006 KE 2014 6 KE NULL NULL NULL
## 7 KE201400000007 KE 2014 7 KE NULL NULL NULL
## 8 KE201400000008 KE 2014 8 KE NULL NULL NULL
## 9 KE201400000009 KE 2014 9 KE NULL NULL NULL
## 10 KE201400000010 KE 2014 10 KE NULL NULL NULL
## ADM1SALBCO ADM1DHS ADM1NAME DHSREGCO DHSREGNA SOURCE URBAN_RURA LATNUM
## 1 NULL 90 Nairobi 9 Nairobi CEN U -1.282723
## 2 NULL 90 Nairobi 9 Nairobi CEN U -1.278781
## 3 NULL 90 Nairobi 9 Nairobi CEN U -1.279646
## 4 NULL 90 Nairobi 9 Nairobi CEN U -1.280380
## 5 NULL 90 Nairobi 9 Nairobi CEN U -1.272064
## 6 NULL 90 Nairobi 9 Nairobi CEN U -1.263006
## 7 NULL 90 Nairobi 9 Nairobi CEN U -1.341546
## 8 NULL 90 Nairobi 9 Nairobi CEN U -1.312427
## 9 NULL 90 Nairobi 9 Nairobi CEN U -1.318072
## 10 NULL 90 Nairobi 9 Nairobi CEN U -1.315103
## LONGNUM ALT_GPS ALT_DEM DATUM geometry
## 1 36.75296 9999 1801 WGS84 POINT (35.71713 0.140332)
## 2 36.75844 9999 1801 WGS84 POINT (35.76638 0.6270388)
## 3 36.74593 9999 1808 WGS84 POINT (36.11351 0.4334558)
## 4 36.69709 9999 1894 WGS84 POINT (35.87447 0.4794212)
## 5 36.74313 9999 1816 WGS84 POINT (36.32321 0.1759624)
## 6 36.71059 9999 1920 WGS84 POINT (35.61286 0.5304094)
## 7 36.69719 9999 1887 WGS84 POINT (35.98676 1.239719)
## 8 36.78541 9999 1702 WGS84 POINT (36.05537 0.538201)
## 9 36.78468 9999 1710 WGS84 POINT (36.10384 1.115597)
## 10 36.80952 9999 1700 WGS84 POINT (36.07837 0.2047607)
ggplot() +
layer_spatial(ke_gps) +
theme_minimal()
ke_borders <- st_read(
"data/ke_boundaries/sdr_subnational_boundaries2.shp",
quiet = TRUE
)
theme_set(theme_minimal())
ggplot() +
layer_spatial(ke_borders) +
layer_spatial(ke_gps)
ggplot() +
layer_spatial(
ke_borders,
fill = "#F6E6CB", # Polygon fill color
color = "#7f7f7f", # Polygon outline color
linewidth = 0.5 # Polygon line width
) +
layer_spatial(
ke_gps,
alpha = 0.2, # Cluster point transparency
color = "#444444" # Cluster point color
) +
labs(title = "DHS Cluster Locations", subtitle = "Note: falsified coordinates")
library(chirps)
ke_chirts <- get_chirts(
vect(ke_borders), # Spatial range
dates = c("2013-01-01", "2013-12-31"), # Date range
var = "Tmax" # Desired temperature variable
)
ke_chirts <- rast("data/ke_chirts_2013.nc") # Our pre-saved CHIRTS NetCDF
ke_chirts
## class : SpatRaster
## dimensions : 198, 163, 365 (nrow, ncol, nlyr)
## resolution : 0.05, 0.05 (x, y)
## extent : 33.85, 42, -4.750001, 5.149999 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : ke_chirts_2013.nc
## names : ke_ch~013_1, ke_ch~013_2, ke_ch~013_3, ke_ch~013_4, ke_ch~013_5, ke_ch~013_6, ...
## time (days) : 2013-01-01 to 2013-12-31
ke_chirts[[1]]
## class : SpatRaster
## dimensions : 198, 163, 1 (nrow, ncol, nlyr)
## resolution : 0.05, 0.05 (x, y)
## extent : 33.85, 42, -4.750001, 5.149999 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source : ke_chirts_2013.nc
## name : ke_chirts_2013_1
## time (days) : 2013-01-01
# Get raster cell values (in this case, max temperature for that cell)
values(ke_chirts)
## ke_chirts_2013_1 ke_chirts_2013_2 ke_chirts_2013_3 ke_chirts_2013_4
## [1,] 36.04524 37.71585 36.70155 38.62380
## [2,] 35.98721 37.81504 36.77429 38.64823
## [3,] 35.83154 37.79731 36.73273 38.56688
## [4,] 35.45006 37.50497 36.42466 38.23553
....
# Number of layers
nlyr(ke_chirts)
## [1] 365
# Date that corresponds to each layer
time(ke_chirts)
## [1] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" "2013-01-05"
## [6] "2013-01-06" "2013-01-07" "2013-01-08" "2013-01-09" "2013-01-10"
## [11] "2013-01-11" "2013-01-12" "2013-01-13" "2013-01-14" "2013-01-15"
## [16] "2013-01-16" "2013-01-17" "2013-01-18" "2013-01-19" "2013-01-20"
## [21] "2013-01-21" "2013-01-22" "2013-01-23" "2013-01-24" "2013-01-25"
....
# Reclassify missing values
m <- matrix(c(-9999, NA), nrow = 1)
ke_chirts <- classify(ke_chirts, m)
ggplot() +
layer_spatial(ke_chirts[[1]], alpha = 0.9) +
layer_spatial(ke_borders, fill = NA, color = "#4c4c4c", linewidth = 0.5)
ke_gps[, "geometry"]
## Simple feature collection with 1594 features and 0 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 33.98758 ymin: -4.586276 xmax: 41.87459 ymax: 4.611987
## Geodetic CRS: WGS 84
## First 10 features:
## geometry
## 1 POINT (35.71713 0.140332)
## 2 POINT (35.76638 0.6270388)
## 3 POINT (36.11351 0.4334558)
## 4 POINT (35.87447 0.4794212)
## 5 POINT (36.32321 0.1759624)
## 6 POINT (35.61286 0.5304094)
## 7 POINT (35.98676 1.239719)
## 8 POINT (36.05537 0.538201)
## 9 POINT (36.10384 1.115597)
## 10 POINT (36.07837 0.2047607)
ke_gps <- st_transform(ke_gps, crs = 32637)
ke_gps[, "geometry"]
## Simple feature collection with 1594 features and 0 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -58474.75 ymin: -506942.7 xmax: 819246.2 ymax: 510915
## Projected CRS: WGS 84 / UTM zone 37N
## First 10 features:
## geometry
## 1 POINT (134498.7 15536.57)
## 2 POINT (140008.4 69417.92)
## 3 POINT (178677.7 47971.17)
## 4 POINT (152044.8 53069.94)
## 5 POINT (202032 19470.52)
## 6 POINT (122890.9 58729.47)
## 7 POINT (164623.8 137217.3)
## 8 POINT (172205.7 59566.57)
## 9 POINT (177653 123465.8)
## 10 POINT (174755.5 22661.86)
ggplot() +
layer_spatial(ke_gps)
ke_buffer <- st_buffer(ke_gps, dist = 10000)
ke_buffer[, "geometry"]
## Simple feature collection with 1594 features and 0 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -68474.75 ymin: -516942.7 xmax: 829246.2 ymax: 520915
## Projected CRS: WGS 84 / UTM zone 37N
## First 10 features:
## geometry
## 1 POLYGON ((144498.7 15536.57...
## 2 POLYGON ((150008.4 69417.92...
## 3 POLYGON ((188677.7 47971.17...
## 4 POLYGON ((162044.8 53069.94...
## 5 POLYGON ((212032 19470.52, ...
## 6 POLYGON ((132890.9 58729.47...
## 7 POLYGON ((174623.8 137217.3...
## 8 POLYGON ((182205.7 59566.57...
## 9 POLYGON ((187653 123465.8, ...
## 10 POLYGON ((184755.5 22661.86...
ggplot() +
layer_spatial(ke_buffer, alpha = 0)
ke_buffer <- st_transform(ke_buffer, crs = 4326) # WGS 84 CRS
ke_chirts_mean <- mean(ke_chirts)
ke_chirts_mean
## class : SpatRaster
## dimensions : 198, 163, 1 (nrow, ncol, nlyr)
## resolution : 0.05, 0.05 (x, y)
## extent : 33.85, 42, -4.750001, 5.149999 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source(s) : memory
## name : mean
## min value : 10.84893
## max value : 41.83465
theme_set(theme_dhs_base())
ggplot() +
layer_spatial(
mask(ke_chirts_mean, ke_borders),
alpha = 0.9
) +
layer_spatial(ke_borders, fill = NA, color = "#4c4c4c", linewidth = 0.5) +
labs(title = "Mean Temperature (°C)", subtitle = "Kenya, 2013", fill = "") +
scale_fill_chirts_c()
## Warning: Removed 13057 rows containing missing values or values outside the scale range
## (`geom_raster()`).
# Average daily CHIRTS within months
ke_chirts_mean_mnths <- tapp(
ke_chirts,
fun = mean,
index = "yearmonths"
)
ke_chirts_mean_mnths
## class : SpatRaster
## dimensions : 198, 163, 12 (nrow, ncol, nlyr)
## resolution : 0.05, 0.05 (x, y)
## extent : 33.85, 42, -4.750001, 5.149999 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source(s) : memory
## names : ym_201301, ym_201302, ym_201303, ym_201304, ym_201305, ym_201306, ...
## min values : 13.05012, 14.09646, 12.47642, 11.19747, 11.40163, 8.428434, ...
## max values : 42.44487, 43.45883, 43.19976, 40.33416, 41.96343, 40.689007, ...
## time (ymnts): 2013-Jan to 2013-Dec
months <- c("January", "February", "March", "April",
"May", "June", "July", "August",
"September", "October", "November", "December")
panels <- purrr::map2(
split_raster(ke_chirts_mean_mnths),
months,
function(r, m) {
chirts_panel_continuous(
r,
borders = ke_borders,
panel_title = m,
show_scale = FALSE,
fill_lab = "",
limits = c(7, 44)
)
}
)
patchwork::wrap_plots(panels) +
patchwork::plot_layout(guides = "collect", ncol = 6) +
patchwork::plot_annotation(
title = "Average Monthly Temperature (°C)",
subtitle = "Kenya, 2013",
caption = "Source: Climate Hazards Center"
)
# List all 12 monthly average files
chirts_mnth_means <- list.files("data/chirts", full.names = TRUE)
# Load monthly averages into a single raster stack
chirts_norm <- rast(chirts_mnth_means)
# Subtract 2013 monthly average from 10-year monthly average
chirts_dev <- ke_chirts_mean_mnths - chirts_norm
chirts_dev
## class : SpatRaster
## dimensions : 198, 163, 12 (nrow, ncol, nlyr)
## resolution : 0.05, 0.05 (x, y)
## extent : 33.85, 42, -4.750001, 5.149999 (xmin, xmax, ymin, ymax)
## coord. ref. : lon/lat WGS 84 (EPSG:4326)
## source(s) : memory
## names : ym_201301, ym_201302, ym_201303, ym_201304, ym_201305, ym_201306, ...
## min values : -0.7652607, -0.5505671, -1.6592527, -1.94416453, -1.233843, -1.9243449, ...
## max values : 1.2226097, 0.9732178, 0.9714775, 0.09900742, 1.311446, 0.8205259, ...
## time (ymnts): 2013-Jan to 2013-Dec
ggplot() +
layer_spatial(chirts_dev[[5]]) +
scale_fill_gradient2(
low = "#438b81",
mid = "#f0ebd7",
high = "#c25d33",
limits = c(-2, 2),
na.value = "transparent"
) +
labs(
title = "Difference from 10-year average temperature (°C)",
subtitle = "May, 2013",
fill = ""
)
panels <- purrr::map2(
split_raster(chirts_dev),
months,
function(r, m) {
chirts_panel_diverging(
r,
borders = ke_borders,
panel_title = m,
show_scale = FALSE,
fill_lab = "",
low = "#438b81", mid = "#f0ebd7", high = "#c25d33",
limits = c(-2, 2),
na.value = "transparent"
)
}
)
patchwork::wrap_plots(panels) +
patchwork::plot_layout(guides = "collect", ncol = 6) +
patchwork::plot_annotation(
title = "Deviation from 10-Year Monthly Average (°C)",
subtitle = "Kenya, 2013",
caption = "Source: Climate Hazards Center"
)
ke_mean_to_plot <- mask(ke_chirts_mean_mnths[[1]], ke_borders)
ggplot() +
layer_spatial(ke_mean_to_plot, alpha = 0.9) +
layer_spatial(ke_borders, fill = NA, color = "#444444") +
layer_spatial(ke_buffer, fill = NA, color = alpha("black", 0.5)) +
labs(title = "Mean Temperature (°C)", subtitle = "January, 2013", fill = "") +
scale_fill_chirts_c()
## Warning: Removed 13057 rows containing missing values or values outside the scale range
## (`geom_raster()`).
# Select a single cluster to demonstrate
clust1 <- ke_buffer |>
filter(DHSID == "KE201400001487")
ggplot() +
layer_spatial(crop(ke_chirts_mean_mnths[[1]], clust1, snap = "out")) +
layer_spatial(clust1, fill = NA, color = "black") +
labs(title = "Mean Temperature (°C)", subtitle = "Single Cluster", fill = "") +
scale_fill_chirts_c()
extract(ke_chirts_mean_mnths[[1]], clust1)
## ID ym_201301
## 1 1 35.12662
## 2 1 34.01415
## 3 1 32.42788
## 4 1 35.06354
## 5 1 33.72320
## 6 1 32.87634
## 7 1 34.81424
## 8 1 33.69226
## 9 1 33.22820
## 10 1 33.79419
extract(
ke_chirts_mean_mnths[[1]],
clust1,
weights = TRUE
)
## ID ym_201301 weight
## 1 1 35.65890 0.02
## 2 1 35.12662 0.69
## 3 1 34.01415 1.00
## 4 1 32.42788 0.69
## 5 1 33.32614 0.02
## 6 1 35.59715 0.27
## 7 1 35.06354 1.00
## 8 1 33.72320 1.00
## 9 1 32.87634 1.00
## 10 1 33.62588 0.27
## 11 1 35.55475 0.14
## 12 1 34.81424 0.98
## 13 1 33.69226 1.00
## 14 1 33.22820 0.98
## 15 1 33.50158 0.14
## 16 1 34.91669 0.24
## 17 1 33.79419 0.54
## 18 1 33.35481 0.24
extract(
ke_chirts_mean_mnths[[1]],
clust1,
weights = TRUE,
fun = mean
)
## ID ym_201301
## 1 1 33.95651
ke_chirts_clust <- extract(
ke_chirts_mean_mnths, # Extract values for each month of temperature data
ke_buffer, # Use all cluster polygons
weights = TRUE,
fun = mean
)
# Update to use cluster IDs
ke_chirts_clust$ID <- ke_buffer$DHSID
ke_chirts_clust
## ID ym_201301 ym_201302 ym_201303 ym_201304 ym_201305 ym_201306
## 1 KE201400000001 25.50097 26.93703 26.74054 23.99621 24.73310 23.18324
## 2 KE201400000002 27.44769 28.71789 28.42848 25.70832 26.39564 25.03732
## 3 KE201400000003 31.52870 32.83890 32.51355 29.73158 30.76798 29.27555
## 4 KE201400000004 30.59242 32.00248 31.70173 28.82798 29.88593 28.54942
....
ke_chirts_long <- ke_chirts_clust |>
pivot_longer(
cols = -ID, # Do not pivot the ID col
names_to = "CHIRTS_DATE", # Rename output columns
values_to = "MEAN_TEMP"
)
ke_chirts_long
## # A tibble: 19,128 × 3
## ID CHIRTS_DATE MEAN_TEMP
## <chr> <chr> <dbl>
## 1 KE201400000001 ym_201301 25.5
## 2 KE201400000001 ym_201302 26.9
## 3 KE201400000001 ym_201303 26.7
## 4 KE201400000001 ym_201304 24.0
## 5 KE201400000001 ym_201305 24.7
## 6 KE201400000001 ym_201306 23.2
## 7 KE201400000001 ym_201307 23.4
## 8 KE201400000001 ym_201308 23.0
## 9 KE201400000001 ym_201309 24.5
## 10 KE201400000001 ym_201310 25.1
## # ℹ 19,118 more rows
ke_dhs$KIDDOBCMC
## [1] 1364 1331 1356 1333 1348 1352 1321 1331 1351 1376 1333 1354 1318 1372
## [15] 1333 1318 1367 1363 1359 1341 1318 1347 1371 1351 1320 1317 1337 1363
## [29] 1330 1364 1373 1356 1335 1349 1320 1332 1330 1348 1366 1341 1346 1324
## [43] 1334 1327 1334 1316 1321 1364 1366 1352 1354 1363 1369 1351 1351 1362
## [57] 1331 1331 1345 1327 1344 1327 1340 1322 1344 1319 1315 1372 1363 1326
....
ke_chirts_long$CHIRTS_DATE
## [1] "ym_201301" "ym_201302" "ym_201303" "ym_201304" "ym_201305" "ym_201306"
## [7] "ym_201307" "ym_201308" "ym_201309" "ym_201310" "ym_201311" "ym_201312"
## [13] "ym_201301" "ym_201302" "ym_201303" "ym_201304" "ym_201305" "ym_201306"
## [19] "ym_201307" "ym_201308" "ym_201309" "ym_201310" "ym_201311" "ym_201312"
## [25] "ym_201301" "ym_201302" "ym_201303" "ym_201304" "ym_201305" "ym_201306"
....
ipums_var_desc(ke_ddi, "KIDDOBCMC")
## [1] "KIDDOBCMC (B3) reports the century month code for the date of birth of the child. \n\nCentury month codes (CMC) are particularly useful for checking the consistency of dates, calculating intervals between events, and imputing dates when the information for an event is missing or partially complete.\n\nCentury month codes (CMC) are calculated by multiplying by 12 the difference between the year of an event and 1900. The year 1900 was chosen as the reference period because all of the DHS-relevant events occurred during the twentieth or twenty-first centuries. The month of the event is added to the previous result.\nCMC = (Year minus 1900) * 12 + MonthFor example, the CMC for June 2002 is:\nCMC = (2002 minus 1900) * 12 + 6 = 1230In other words, 1,230 months have elapsed between January 1900 and June 2002. Starting with CMC figures, one can calculate the month and year using the following formulas:\nYear = int( ( CMC minus 1 )/12 ) + 1900[int(x) is the integer part of x]Month = CMC minus ( ( Year minus 1900 ) * 12 )For more information, visit The DHS Program website to find the latest Guide to DHS Statistics."
# Replace "ym_" with ""
ke_chirts_long <- ke_chirts_long |>
mutate(CHIRTS_DATE = str_replace(CHIRTS_DATE, "ym_", ""))
ke_chirts_long
## # A tibble: 19,128 × 3
## ID CHIRTS_DATE MEAN_TEMP
## <chr> <chr> <dbl>
## 1 KE201400000001 201301 25.5
## 2 KE201400000001 201302 26.9
## 3 KE201400000001 201303 26.7
## 4 KE201400000001 201304 24.0
## 5 KE201400000001 201305 24.7
## 6 KE201400000001 201306 23.2
## 7 KE201400000001 201307 23.4
## 8 KE201400000001 201308 23.0
## 9 KE201400000001 201309 24.5
## 10 KE201400000001 201310 25.1
## # ℹ 19,118 more rows
ke_chirts_long <- ke_chirts_long |>
mutate(CHIRTS_DATE = ym(CHIRTS_DATE))
ke_chirts_long
## # A tibble: 19,128 × 3
## ID CHIRTS_DATE MEAN_TEMP
## <chr> <date> <dbl>
## 1 KE201400000001 2013-01-01 25.5
## 2 KE201400000001 2013-02-01 26.9
## 3 KE201400000001 2013-03-01 26.7
## 4 KE201400000001 2013-04-01 24.0
## 5 KE201400000001 2013-05-01 24.7
## 6 KE201400000001 2013-06-01 23.2
## 7 KE201400000001 2013-07-01 23.4
## 8 KE201400000001 2013-08-01 23.0
## 9 KE201400000001 2013-09-01 24.5
## 10 KE201400000001 2013-10-01 25.1
## # ℹ 19,118 more rows
ke_chirts_long <- ke_chirts_long |>
mutate(CHIRTS_DATE = (year(CHIRTS_DATE) - 1900) * 12 + month(CHIRTS_DATE))
ke_chirts_long
## # A tibble: 19,128 × 3
## ID CHIRTS_DATE MEAN_TEMP
## <chr> <dbl> <dbl>
## 1 KE201400000001 1357 25.5
## 2 KE201400000001 1358 26.9
## 3 KE201400000001 1359 26.7
## 4 KE201400000001 1360 24.0
## 5 KE201400000001 1361 24.7
## 6 KE201400000001 1362 23.2
## 7 KE201400000001 1363 23.4
## 8 KE201400000001 1364 23.0
## 9 KE201400000001 1365 24.5
## 10 KE201400000001 1366 25.1
## # ℹ 19,118 more rows
ke_chirts_long <- ke_chirts_clust |>
pivot_longer(
cols = -ID,
names_to = "CHIRTS_DATE",
values_to = "MEAN_TEMP"
) |>
mutate(
CHIRTS_DATE = str_replace(CHIRTS_DATE, "ym_", ""),
CHIRTS_DATE = ym(CHIRTS_DATE),
CHIRTS_DATE = (year(CHIRTS_DATE) - 1900) * 12 + month(CHIRTS_DATE)
)
# Cluster-level monthly CHIRTS data
ke_chirts_long
## # A tibble: 19,128 × 3
## ID CHIRTS_DATE MEAN_TEMP
## <chr> <dbl> <dbl>
## 1 KE201400000001 1357 25.5
## 2 KE201400000001 1358 26.9
## 3 KE201400000001 1359 26.7
## 4 KE201400000001 1360 24.0
## 5 KE201400000001 1361 24.7
## 6 KE201400000001 1362 23.2
## 7 KE201400000001 1363 23.4
## 8 KE201400000001 1364 23.0
## 9 KE201400000001 1365 24.5
## 10 KE201400000001 1366 25.1
## # ℹ 19,118 more rows
# Child-level tabular DHS survey data
ke_dhs[, c("DHSID", "KIDID", "KIDDOBCMC", "BIRTHWT")]
## # A tibble: 5,765 × 4
## DHSID KIDID KIDDOBCMC BIRTHWT
## <chr> <chr> <dbl> <dbl>
## 1 KE201400000001 40406 0001033 02 1 1364 3.2
## 2 KE201400000001 40406 0001033 02 2 1331 3.5
## 3 KE201400000002 40406 0002051 02 1 1356 3
## 4 KE201400000002 40406 0002051 02 2 1333 2.3
## 5 KE201400000003 40406 0003023 02 1 1348 3.2
## 6 KE201400000003 40406 0003047 02 1 1352 3.2
## 7 KE201400000003 40406 0003056 02 3 1321 2.8
## 8 KE201400000003 40406 0003072 02 1 1331 4.5
## 9 KE201400000004 40406 0004074 02 1 1351 2
## 10 KE201400000005 40406 0005011 02 1 1376 3.2
## # ℹ 5,755 more rows
inner_join(
ke_dhs,
ke_chirts_long
)
## Error in `inner_join()`:
## ! `by` must be supplied when `x` and `y` have no common variables.
## ℹ Use `cross_join()` to perform a cross-join.
ke_dhs_chirts <- inner_join(
ke_dhs,
ke_chirts_long,
by = c("DHSID" = "ID", "KIDDOBCMC" = "CHIRTS_DATE")
)
# Each child is now associated with monthly average temperature for their
# birth month
ke_dhs_chirts |>
select(DHSID, KIDID, KIDDOBCMC, MEAN_TEMP, BIRTHWT)
## # A tibble: 1,253 × 5
## DHSID KIDID KIDDOBCMC MEAN_TEMP BIRTHWT
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 KE201400000001 40406 0001033 02 1 1364 23.0 3.2
## 2 KE201400000007 40406 0007056 02 1 1367 28.9 3
## 3 KE201400000008 40406 0008021 02 1 1363 31.1 3.8
## 4 KE201400000008 40406 0008052 02 1 1359 33.9 2.95
## 5 KE201400000010 40406 0010108 01 1 1363 28.9 3.26
## 6 KE201400000010 40406 0010108 04 1 1364 28.5 2.3
## 7 KE201400000012 40406 0012061 02 1 1366 32.2 4.9
## 8 KE201400000017 40406 0017030 02 1 1364 20.4 3.9
## 9 KE201400000017 40406 0017046 02 1 1366 22.3 2.5
## 10 KE201400000018 40406 0018005 02 1 1363 21.5 3.2
## # ℹ 1,243 more rows
ggplot(ke_dhs_chirts) +
geom_point(aes(x = MEAN_TEMP, y = BIRTHWT), alpha = 0.5, size = 2)
## Warning: Removed 31 rows containing missing values or values outside the scale range
## (`geom_point()`).